#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(gplots)
library(WGCNA)
library(expss)
library(polycor)
library(knitr)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

clustering_selected = 'DynamicHybridMergedSmall'
#clustering_selected = 'DynamicHybrid'

cat(paste0('Using clustering ', clustering_selected))
## Using clustering DynamicHybridMergedSmall

Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_15_WGCNA.Rmd)

# Gandal dataset
load('./../Data/Gandal/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations
GO_annotations = read.csv('./../../../PhD-InitialExperiments/FirstYearReview/Data/GO_annotations/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../Data/SFARI/SFARI_genes_08-29-2019_with_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% 
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))


# Dataset created with DynamicTreeMerged algorithm
dataset = read.csv(paste0('./../Data/Gandal/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]


# GO DEA
# load('./../Data/Gandal/GO_DE_clusters.RData')

rm(DE_info, GO_annotations, clusterings)

Check if for each gene, the module with the smallest distance is their corresponding module

mm = dataset %>% dplyr::select(starts_with('MM.'), starts_with('MMgray')) %>% dplyr::rename(MM.gray = MMgray)
rownames(mm) = dataset$ID
original_max_membership = gsub('MM.', '', colnames(mm)[max.col(mm, ties.method='first')])

cat(paste0('For ', round(100*mean(original_max_membership == gsub('#','',dataset$Module))),
           '% of the genes, their assigned module corresponds to the module with the highest Module Membership'))
## For 53% of the genes, their assigned module corresponds to the module with the highest Module Membership

Apparently this is not the case. Someone asked this in a Bioconductor question and Peter Langfelder answered that this is because WGCNA assigns module labels using dynamic tree cut of hierarchical clustering tree that is based on the Toplogical Overlap Measure. TOM results in similar but not quite the same similarity as correlation, hence for some genes the assigned module may differ from the module with highest kME. But that in all, he doesn’t worry about the module assignment vs. max. kME differences in his own analyses, and he recommends not worrying about it to others as well.

Even when the maximum MM module and the assigned module don’t match, the assigned module is almost always one of the highest ranked Modules by MM, so it’s not that bad that these two metrics don’t match.

mm_rank = matrix(0, nrow=nrow(mm), ncol=ncol(mm))
for(i in 1:nrow(mm_rank)) mm_rank[i,] = rank(-mm[i,], ties.method='min')
colnames(mm_rank) = colnames(mm)
rownames(mm_rank) = rownames(mm)

assigned_module_rank = rep('', nrow(mm_rank))
for(i in 1:nrow(mm_rank)){
  assigned_module_rank[i] = mm_rank[i, paste0('MM.',gsub('#','',dataset$Module[i]))]
}

# Remove genes corresponding to the gray module
assigned_module_rank = assigned_module_rank[dataset$Module!='gray']

plot_data = assigned_module_rank %>% table %>% data.frame
colnames(plot_data) = c('rank', 'frequency')
plot_data = plot_data %>% mutate(rank = as.numeric(as.character(rank))) %>% 
            arrange(rank) %>% mutate('percentage'=round(100*frequency/sum(frequency),2))

ggplotly(plot_data %>% ggplot(aes(rank, frequency, fill=percentage)) + geom_bar(stat='identity') + 
         theme_minimal() + theme(legend.position='none') + ggtitle('Ranking of assigned module by MM') +
         xlab('MM rank by assigned Module'))

Heatmaps of maximum Module Membership and assigned Modules

The palette on the top of the heatmap represents the size of the module, the darker the colour, the larger the module

module_size = dataset %>% group_by(Module) %>% tally %>% mutate(Module = gsub('#', '', Module))
module_size$quant = cut(module_size$n, breaks=9, labels=FALSE)
module_size$meanExpr = sapply(module_size$Module, function(m){mean(rowMeans(datExpr)[dataset$Module==m | dataset$Module==paste0('#',m)])})
module_size$quantME = cut(module_size$meanExpr, breaks=9, labels=FALSE)

heatmap.2(table(original_max_membership, gsub('#','',dataset$Module)), symm=TRUE, dendrogram='none', keysize=1,
          trace='none', scale='none', col=brewer.pal(9,'YlGnBu'), xlab='Assigned Module',  ylab='Highest MM',
          ColSideColors = brewer.pal(9,'PuBu')[module_size$quant])

Scaling by rows it’s easier to see that genes assigned to the larger modules sometimes have a higher module membership with smaller modules

heatmap.2(table(original_max_membership, gsub('#','',dataset$Module)), dendrogram='none', keysize=1, symm=TRUE,
          trace='none', scale='row', col=brewer.pal(9,'YlGnBu'), xlab='Assigned Module',  ylab='Highest MM',
          ColSideColors=brewer.pal(9,'PuBu')[module_size$quant])

Heatmaps of Module Membership by gene

Module Membership by gene (selecting a random sample so it’s not that heavy)

The membership of some module seems to be related to the level of expression of the genes

set.seed(123)
plot_mm = mm %>% sample_frac(0.05) %>% as.matrix
colnames(plot_mm) = gsub('MM.','', colnames(plot_mm))
plot_mm = plot_mm[order(rowMeans(plot_mm)), module_size$Module[order(module_size$n)]]

heatmap.2(plot_mm, xlab('Modules ordered by Size'), ylab('Genes ordered by mean expression'), keysize=1,
          ColSideColors = brewer.pal(9,'PuBu')[module_size$quant[order(module_size$n)]],
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          trace='none', dendrogram='none', scale='none', Rowv=FALSE, Colv=FALSE, col=brewer.pal(9,'Spectral'))

Letting the heatmap order the genes and modules by distance, it seems like the module size is not an important factor but the mean expression is. The modules in the right leg of the dendrogram seem to be associated to genes with low expression and the ones on the left leg to genes with high expression

heatmap.2(plot_mm, trace='none', col=brewer.pal(9,'Spectral'), dendrogram='column',
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          ColSideColors = brewer.pal(9,'PuBu')[module_size$quant[order(module_size$n)]])

The biggest modules have the most extreme memberships (both positive and negative)

plot_mm = plot_mm[order(rowMeans(abs(plot_mm))), module_size$Module[order(module_size$n)]]
heatmap.2(abs(plot_mm), ColSideColors = brewer.pal(9,'PuBu')[module_size$quant[order(module_size$n)]], keysize=1, 
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          trace='none', dendrogram='none', scale='none', Rowv=FALSE, Colv=FALSE, col=brewer.pal(9,'YlOrRd'))

Checking if the mean expression of the modules plays a role

plot_mm = plot_mm[order(rowMeans(plot_mm)), module_size$Module[order(module_size$meanExpr)]]
heatmap.2(plot_mm, ColSideColors = brewer.pal(9,'RdPu')[module_size$quantME[order(module_size$meanExpr)]], keysize=1, 
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          trace='none', dendrogram='none', scale='none', Rowv=FALSE, Colv=FALSE, col=brewer.pal(9,'Spectral'))

plot_mm = plot_mm[order(rowMeans(plot_mm)), module_size$Module[order(module_size$meanExpr)]]
heatmap.2(plot_mm, ColSideColors = brewer.pal(9,'RdPu')[module_size$quantME[order(module_size$meanExpr)]], keysize=1, 
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          trace='none', dendrogram='none', scale='none', Rowv=TRUE, Colv=FALSE, col=brewer.pal(9,'Spectral'))

The mean expression pattern seems to be clearer in the samples than in the modules

The colors in the histogram seem to have inverted, with the red being the highest MM and blue de lowest (I have no idea how this happened)

plot_mm = plot_mm[order(rowMeans(plot_mm)), module_size$Module[order(module_size$meanExpr)]]
heatmap.2(plot_mm, ColSideColors = brewer.pal(9,'RdPu')[module_size$quantME[order(module_size$meanExpr)]], keysize=1, 
          RowSideColors = brewer.pal(9, 'RdPu')[cut(rowMeans(plot_mm), breaks=9, labels=FALSE)],
          trace='none', dendrogram='column', scale='none', col=brewer.pal(9,'Spectral'))


Behaviour differences between assigned module and module with maximum MM


Mean Expression by Gene and by Module

Because of the weird inversion in the heatmap palette from above, check if there’s a relation between the gene’s level of expression and the average level of expression of the module it is assigned to: there is and it is positive, so something weird is happening in the heatmaps above

I’m not sure how to compare these two plots …

plot_data = data.frame('ID' = dataset$ID, 'GeneMeanExpr'=rowMeans(datExpr), 'Module' = gsub('#','',dataset$Module)) %>%
            left_join(module_size, by='Module') %>% mutate('ModuleMeanExpr' = meanExpr)

MM_module_size = data.frame('Module' = original_max_membership) %>% group_by(Module) %>% tally()
MM_module_size$meanExpr = sapply(MM_module_size$Module, function(m){mean(rowMeans(datExpr)[original_max_membership==m |
                                                                                           original_max_membership==paste0('#',m)])})

MM_plot_data = data.frame('ID' = dataset$ID, 'GeneMeanExpr'=rowMeans(datExpr), 'Module' = gsub('#','',dataset$Module)) %>%
            left_join(MM_module_size, by='Module') %>% mutate('ModuleMeanExpr' = meanExpr)


p1 = plot_data %>% ggplot(aes(ModuleMeanExpr, GeneMeanExpr)) + geom_point(alpha=0.2, color=gsub('#gray','gray',paste0('#',plot_data$Module))) + 
                   geom_smooth(color='gray') + theme_minimal() + xlab('Assigned Module Mean Expression') + ylab('Gene Mean Expression') +
                   ggtitle(paste0('Cor = ', round(cor(plot_data$ModuleMeanExpr, plot_data$GeneMeanExpr),3),
                                  '  Regression slope = ', round(lm(GeneMeanExpr ~ ModuleMeanExpr, data=plot_data)$coefficients[[2]],2),
                                  '  R^2 = ', round(cor(plot_data$ModuleMeanExpr, plot_data$GeneMeanExpr)^2,3)))

p2 = MM_plot_data %>% ggplot(aes(ModuleMeanExpr, GeneMeanExpr)) + geom_point(alpha=0.2, color=gsub('#gray','gray',paste0('#',plot_data$Module))) + 
                      geom_smooth(color='gray') + theme_minimal() + xlab('Highest MM Module Mean Expression') + ylab('Gene Mean Expression') +
                      ggtitle(paste0('Cor = ', round(cor(MM_plot_data$ModuleMeanExpr, MM_plot_data$GeneMeanExpr),3),
                                  '  Regression slope = ', round(lm(GeneMeanExpr ~ ModuleMeanExpr, data=MM_plot_data)$coefficients[[2]],2),
                                  '  R^2 = ', round(cor(MM_plot_data$ModuleMeanExpr, MM_plot_data$GeneMeanExpr)^2,3)))

grid.arrange(p1, p2, nrow = 1)

rm(p1, p2)

Assigning genes by MM increases the mean expression of the largest modules and decreases the mean expression of the smallest ones

plot_data = module_size %>% dplyr::select(Module, meanExpr, n) %>% rename(AssignedModuleME=meanExpr) %>%
            left_join(MM_module_size, by='Module') %>% rename(MMModuleME=meanExpr)

ggplotly(plot_data %>% ggplot(aes(AssignedModuleME, MMModuleME, size=n.x)) + geom_abline(slope=1, intercept=0, color='gray') +
         geom_point(color=gsub('#gray','gray',paste0('#',plot_data$Module)), alpha=0.5, aes(id=Module)) + ggtitle('Mean Expression') + 
         xlab('Mean Expression of Assigned Module') + ylab('Mean Expression of Module with Higest MM') + theme_minimal() + coord_fixed())

There doesn’t seem to be a relation between module size and level of expression, so the result from above probably has no effect on the level of expression patterns found before

plot_data = module_size %>% dplyr::select(Module, meanExpr, n)

ggplotly(plot_data %>% ggplot(aes(meanExpr, n)) + geom_point(color=gsub('#gray','gray',paste0('#',plot_data$Module))) + 
         geom_smooth(color='gray', alpha=0.1) + theme_minimal())

To see if the strength of the relation between level of expression and assigned module (using the regular WGCNA assignment vs using the maximum MM), I’m going to fit a linear regression to these two module classifications and see which has a better performance.

It seems like the maxMM Module assignment has a stronger relation with mean expression than the originally asigned Modules … but I’m not sure how reliable this results are

lm_data = data.frame('MeanExpr' = log2(rowMeans(datExpr)+1), 'MMModule' = as.factor(original_max_membership), 
                     'assignedModule' = as.factor(gsub('#','',dataset$Module)))

lm_MM = lm(MeanExpr ~ MMModule, data=lm_data[lm_data$MMModule!='gray',])
lm_aM = lm(MeanExpr ~ assignedModule, data=lm_data[lm_data$assignedModule!='gray',])

cat('Results using the maxMM Module:')
## Results using the maxMM Module:
summary(lm_MM)
## 
## Call:
## lm(formula = MeanExpr ~ MMModule, data = lm_data[lm_data$MMModule != 
##     "gray", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83981 -0.20169  0.03769  0.21803  0.99391 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.090479   0.014779 209.119  < 2e-16 ***
## MMModule00B0F6  0.011800   0.023610   0.500 0.617244    
## MMModule00B6EB  0.057810   0.023325   2.478 0.013205 *  
## MMModule00B70C  0.213295   0.022722   9.387  < 2e-16 ***
## MMModule00BB45  0.058565   0.020231   2.895 0.003798 ** 
## MMModule00BBDD  0.140225   0.022637   6.194 5.99e-10 ***
## MMModule00BD64  0.032734   0.018983   1.724 0.084663 .  
## MMModule00BECD -0.074431   0.021525  -3.458 0.000546 ***
## MMModule00BF7D -0.116130   0.021829  -5.320 1.05e-07 ***
## MMModule00C094 -0.186260   0.019680  -9.465  < 2e-16 ***
## MMModule00C0BB  0.271495   0.019518  13.910  < 2e-16 ***
## MMModule00C1A8  0.092988   0.025936   3.585 0.000338 ***
## MMModule4B9FFF  0.057754   0.019862   2.908 0.003645 ** 
## MMModule53B400  0.179547   0.017727  10.128  < 2e-16 ***
## MMModule75AF00  0.101815   0.022575   4.510 6.52e-06 ***
## MMModule8195FF  0.172702   0.018685   9.243  < 2e-16 ***
## MMModule8EAB00 -0.055586   0.020964  -2.652 0.008020 ** 
## MMModuleA3A500 -0.080720   0.022258  -3.627 0.000288 ***
## MMModuleA58AFF -0.030488   0.021068  -1.447 0.147887    
## MMModuleB4A000 -0.064662   0.024838  -2.603 0.009239 ** 
## MMModuleC17FFF  0.221578   0.017935  12.355  < 2e-16 ***
## MMModuleC49A00  0.108000   0.018660   5.788 7.27e-09 ***
## MMModuleD29300  0.035031   0.018920   1.852 0.064115 .  
## MMModuleD774FD -0.131048   0.024665  -5.313 1.09e-07 ***
## MMModuleDE8C00 -0.077895   0.027104  -2.874 0.004060 ** 
## MMModuleE76BF3  0.027031   0.018741   1.442 0.149233    
## MMModuleE88523 -0.105360   0.023887  -4.411 1.04e-05 ***
## MMModuleF17E4F  0.076575   0.018966   4.037 5.43e-05 ***
## MMModuleF365E6  0.135140   0.018144   7.448 9.95e-14 ***
## MMModuleF8766D  0.015439   0.022055   0.700 0.483925    
## MMModuleFB61D7  0.128640   0.021555   5.968 2.45e-09 ***
## MMModuleFD6F86  0.114806   0.019423   5.911 3.47e-09 ***
## MMModuleFF61C5 -0.133784   0.020241  -6.610 3.97e-11 ***
## MMModuleFF64B2 -0.008628   0.019888  -0.434 0.664395    
## MMModuleFF699D  0.163659   0.022966   7.126 1.08e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3014 on 16126 degrees of freedom
## Multiple R-squared:  0.1232, Adjusted R-squared:  0.1214 
## F-statistic: 66.67 on 34 and 16126 DF,  p-value: < 2.2e-16
cat('Results using the assigned Module:')
## Results using the assigned Module:
summary(lm_aM)
## 
## Call:
## lm(formula = MeanExpr ~ assignedModule, data = lm_data[lm_data$assignedModule != 
##     "gray", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.88606 -0.20640  0.03891  0.22270  0.94648 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.110595   0.026810 116.025  < 2e-16 ***
## assignedModule00B0F6  0.149851   0.041351   3.624 0.000291 ***
## assignedModule00B6EB  0.110688   0.041478   2.669 0.007625 ** 
## assignedModule00B70C  0.190191   0.030889   6.157 7.58e-10 ***
## assignedModule00BB45  0.043149   0.030760   1.403 0.160713    
## assignedModule00BBDD  0.208979   0.043209   4.836 1.33e-06 ***
## assignedModule00BD64 -0.076429   0.027916  -2.738 0.006192 ** 
## assignedModule00BECD -0.081527   0.041103  -1.983 0.047332 *  
## assignedModule00BF7D -0.093901   0.031729  -2.959 0.003086 ** 
## assignedModule00C094 -0.191956   0.029751  -6.452 1.13e-10 ***
## assignedModule00C0BB  0.221029   0.029092   7.598 3.18e-14 ***
## assignedModule00C1A8  0.331791   0.060508   5.483 4.23e-08 ***
## assignedModule4B9FFF  0.007159   0.029707   0.241 0.809581    
## assignedModule53B400  0.085401   0.027705   3.083 0.002056 ** 
## assignedModule75AF00  0.146637   0.038367   3.822 0.000133 ***
## assignedModule8195FF  0.064730   0.028788   2.249 0.024558 *  
## assignedModule8EAB00 -0.109759   0.038774  -2.831 0.004650 ** 
## assignedModuleA3A500 -0.103021   0.040864  -2.521 0.011709 *  
## assignedModuleA58AFF  0.057693   0.039303   1.468 0.142150    
## assignedModuleB4A000 -0.039886   0.053020  -0.752 0.451900    
## assignedModuleC17FFF  0.116542   0.028091   4.149 3.36e-05 ***
## assignedModuleC49A00  0.051890   0.028551   1.817 0.069162 .  
## assignedModuleD29300  0.083385   0.031916   2.613 0.008993 ** 
## assignedModuleD774FD -0.088074   0.041103  -2.143 0.032147 *  
## assignedModuleDE8C00  0.063322   0.053020   1.194 0.232378    
## assignedModuleE76BF3 -0.050982   0.028297  -1.802 0.071614 .  
## assignedModuleE88523 -0.082744   0.035408  -2.337 0.019459 *  
## assignedModuleF17E4F  0.026795   0.028176   0.951 0.341624    
## assignedModuleF365E6  0.053883   0.028381   1.899 0.057640 .  
## assignedModuleF8766D  0.103814   0.036156   2.871 0.004093 ** 
## assignedModuleFB61D7  0.252664   0.054412   4.644 3.45e-06 ***
## assignedModuleFD6F86  0.094489   0.028613   3.302 0.000961 ***
## assignedModuleFF61C5 -0.135269   0.033166  -4.079 4.55e-05 ***
## assignedModuleFF64B2 -0.041716   0.038136  -1.094 0.274027    
## assignedModuleFF699D  0.176538   0.045017   3.922 8.83e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3069 on 16387 degrees of freedom
## Multiple R-squared:  0.08766,    Adjusted R-squared:  0.08577 
## F-statistic: 46.31 on 34 and 16387 DF,  p-value: < 2.2e-16

The models are quite similar

coefs_MM = summary(lm_MM)$coefficients %>% data.frame %>% mutate('Module' = gsub('MMModule','',rownames(.)))
coefs_aM = summary(lm_aM)$coefficients %>% data.frame %>% mutate('Module' = gsub('assignedModule','',gsub('#','',rownames(.))))

plot_data = coefs_MM %>% left_join(coefs_aM, by='Module') %>% mutate('color'=ifelse(Module=='(Intercept)', 'gray', paste0('#',Module)))

ggplotly(plot_data %>% ggplot(aes(Estimate.x, Estimate.y)) + geom_point(color=plot_data$color, aes(id=Module)) + 
         geom_abline(slope=1, intercept=0, color='gray') + xlab('Estimate MM Model') + ylab('Estimate assignedModule Model') + 
         theme_minimal() + ggtitle('Relation between coefficients'))

Module Size

Assigning genes by MM balances more the size of the modules, increasing the smallest ones and reducing the largest ones. Mean expression doesn’t seem to be a important factor here.

plot_data = module_size %>% dplyr::select(Module, meanExpr, n) %>% rename(AssignedModuleN=n) %>%
            left_join(MM_module_size, by='Module') %>% rename(MMModuleN=n)

ggplotly(plot_data %>% ggplot(aes(AssignedModuleN, MMModuleN, size=meanExpr.x)) + geom_abline(slope=1, intercept=0, color='gray') +
         geom_smooth(color='gray', se=FALSE) + geom_point(color=gsub('#gray','gray',paste0('#',plot_data$Module)), alpha=0.5, aes(id=Module)) +
         ggtitle('Module Size') + xlab('Size of Assigned Module') + ylab('Size of Module with Higest MM') + theme_minimal())

Conclusion



Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.24            polycor_0.7-10        expss_0.10.1         
##  [4] WGCNA_1.68            fastcluster_1.1.25    dynamicTreeCut_1.63-1
##  [7] gplots_3.0.1.1        GGally_1.4.0          gridExtra_2.3        
## [10] viridis_0.5.1         viridisLite_0.3.0     RColorBrewer_1.1-2   
## [13] dendextend_1.13.2     plotly_4.9.1          glue_1.3.1           
## [16] reshape2_1.4.3        forcats_0.4.0         stringr_1.4.0        
## [19] dplyr_0.8.3           purrr_0.3.3           readr_1.3.1          
## [22] tidyr_1.0.0           tibble_2.1.3          ggplot2_3.2.1        
## [25] tidyverse_1.3.0      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.2-0                 plyr_1.8.5                 
##   [5] lazyeval_0.2.2              splines_3.6.0              
##   [7] crosstalk_1.0.0             BiocParallel_1.20.1        
##   [9] GenomeInfoDb_1.22.0         robust_0.4-18.2            
##  [11] digest_0.6.23               foreach_1.4.7              
##  [13] htmltools_0.4.0             GO.db_3.10.0               
##  [15] gdata_2.18.0                fansi_0.4.1                
##  [17] magrittr_1.5                checkmate_1.9.4            
##  [19] memoise_1.1.0               fit.models_0.5-14          
##  [21] cluster_2.0.8               doParallel_1.0.15          
##  [23] annotate_1.64.0             modelr_0.1.5               
##  [25] matrixStats_0.55.0          colorspace_1.4-1           
##  [27] blob_1.2.0                  rvest_0.3.5                
##  [29] rrcov_1.4-7                 haven_2.2.0                
##  [31] xfun_0.8                    crayon_1.3.4               
##  [33] RCurl_1.95-4.12             jsonlite_1.6               
##  [35] genefilter_1.68.0           zeallot_0.1.0              
##  [37] impute_1.60.0               survival_2.44-1.1          
##  [39] iterators_1.0.12            gtable_0.3.0               
##  [41] zlibbioc_1.32.0             XVector_0.26.0             
##  [43] DelayedArray_0.12.2         BiocGenerics_0.32.0        
##  [45] DEoptimR_1.0-8              scales_1.1.0               
##  [47] mvtnorm_1.0-11              DBI_1.1.0                  
##  [49] Rcpp_1.0.3                  xtable_1.8-4               
##  [51] htmlTable_1.13.1            foreign_0.8-71             
##  [53] bit_1.1-15.1                preprocessCore_1.48.0      
##  [55] Formula_1.2-3               stats4_3.6.0               
##  [57] htmlwidgets_1.5.1           httr_1.4.1                 
##  [59] ellipsis_0.3.0              acepack_1.4.1              
##  [61] farver_2.0.3                XML_3.98-1.20              
##  [63] pkgconfig_2.0.3             reshape_0.8.8              
##  [65] nnet_7.3-12                 dbplyr_1.4.2               
##  [67] locfit_1.5-9.1              later_1.0.0                
##  [69] labeling_0.3                tidyselect_0.2.5           
##  [71] rlang_0.4.2                 AnnotationDbi_1.48.0       
##  [73] munsell_0.5.0               cellranger_1.1.0           
##  [75] tools_3.6.0                 cli_2.0.1                  
##  [77] generics_0.0.2              RSQLite_2.2.0              
##  [79] broom_0.5.3                 fastmap_1.0.1              
##  [81] evaluate_0.14               yaml_2.2.0                 
##  [83] bit64_0.9-7                 fs_1.3.1                   
##  [85] robustbase_0.93-5           caTools_1.17.1.2           
##  [87] nlme_3.1-139                mime_0.8                   
##  [89] xml2_1.2.2                  compiler_3.6.0             
##  [91] rstudioapi_0.10             reprex_0.3.0               
##  [93] geneplotter_1.64.0          pcaPP_1.9-73               
##  [95] stringi_1.4.5               lattice_0.20-38            
##  [97] Matrix_1.2-17               vctrs_0.2.1                
##  [99] pillar_1.4.3                lifecycle_0.1.0            
## [101] data.table_1.12.8           bitops_1.0-6               
## [103] httpuv_1.5.2                GenomicRanges_1.38.0       
## [105] R6_2.4.1                    latticeExtra_0.6-28        
## [107] promises_1.1.0              KernSmooth_2.23-15         
## [109] IRanges_2.20.2              codetools_0.2-16           
## [111] MASS_7.3-51.4               gtools_3.8.1               
## [113] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [115] DESeq2_1.26.0               withr_2.1.2                
## [117] S4Vectors_0.24.2            GenomeInfoDbData_1.2.2     
## [119] mgcv_1.8-28                 parallel_3.6.0             
## [121] hms_0.5.3                   grid_3.6.0                 
## [123] rpart_4.1-15                rmarkdown_1.14             
## [125] Cairo_1.5-10                shiny_1.4.0                
## [127] Biobase_2.46.0              lubridate_1.7.4            
## [129] base64enc_0.1-3